%% read data
data = csvread('ex1data2.txt');
X = data(:, 1:2);
y = data(:, 3);
num_iters = 20;
m = length(y);

X = featureNormalize(X);
X = [ones(m,1), X];

%% test learning rate
[~, J1] = gradientDescentMulti(X, y, [0;0;0], 0.01, num_iters);
plot(1:num_iters, log(J1)); hold on; 

[~, J1] = gradientDescentMulti(X, y, [0;0;0], 0.03, num_iters);
plot(1:num_iters, log(J1));

[~, J1] = gradientDescentMulti(X, y, [0;0;0], 0.1, num_iters);
plot(1:num_iters, log(J1));

[~, J1] = gradientDescentMulti(X, y, [0;0;0], 0.3, num_iters);
plot(1:num_iters, log(J1));

[~, J1] = gradientDescentMulti(X, y, [0;0;0], 1, num_iters);
plot(1:num_iters, log(J1));

[~, J1] = gradientDescentMulti(X, y, [0;0;0], 1.4, num_iters);
plot(1:num_iters, log(J1), ':');
